library(data.table)
library(lubridate)
library(stringi)
library(stringr)
library(ggplot2)
library(ggthemes)
library(scales)

# Declare working directory beforehand in an environment variable
# IMPERIAL_LEGAL_POLITICS_REPLICATION_PATH = "path_to_your_folder"
# with the aid of usethis::edit_r_environ()
# Restart R session for the changes to take effect
path <- Sys.getenv("IMPERIAL_LEGAL_POLITICS_REPLICATION_PATH")
setwd(path)

# Load an object with case-instance-side-outcome-level data
load("data/krasnodar_case_outcomes_side_instance.rdata")

##############################
# Aggregate data at case level (with first instance taking precedence)

cols_to_keep <- c("caseid", "caseTypeCode", "caseCategoryUnified", "caseCategoryRedux", "caseCategory_government", "caseCategory_private", "year", "year_factor", "petty_case", "countDocumentsByCourt", "lncountDocumentsByCourt", "days_elapsed", "claimSum_deflated", "claimSum_decile", "recoverySum_deflated", "was_appealed", "registrationDate", "date", "dispute_type", "local_entities_only", names(krasnodar_case_outcomes_side_instance)[grepl("judge|government|agency|local_entity", names(krasnodar_case_outcomes_side_instance))])

krasnodar_cases <- krasnodar_case_outcomes_side_instance[, lapply(.SD, function(x) x[[1]]), by = "caseid", .SDcols = cols_to_keep ]
krasnodar_cases[, c("government", "federal_agency", "regional_agency", "municipal_agency", "local_entity") := NULL ]

##############################
# TABLE S2. Summary statistics at case level by year: Government vs. Private disputes, Krasnodar arbitrazh courts of the first instance

## A table with summary statistics (variables are named to preserve order)
government_year_table <- krasnodar_cases[ involved_government == 1, list(
	`01_total_government_cases` = sum(involved_government),
	`01a_involving_federal_agency` = round(100*mean(involved_federal_agency, na.rm = T), 1),
	`01b_involving_regional_agency` = round(100*mean(involved_regional_agency, na.rm = T), 1),
	`01c_involving_municipal_agency` = round(100*mean(involved_municipal_agency, na.rm = T), 1),
	`02_count_admin_cases` = sum(caseTypeCode == "administrative"),
	`03_count_civil_cases` = sum(caseTypeCode == "civil"),
	`04_share_local_entities` = round(100*mean(local_entities_only, na.rm = T), 1),
	`05_median_claim_sum` = round(median(ifelse(claimSum_deflated > 0, claimSum_deflated, NA), na.rm = T)/1000, 1),
	`06_median_days_to_decision` = round(median(days_elapsed, na.rm = T), 1),
	`07_median_court_documents_per_case` = round(median(countDocumentsByCourt, na.rm = T), 1),
	`11_share_petty_cases` = round(100*mean(petty_case, na.rm = T), 1),
	`11a_share_petty_cases_federal_agency` = round(100*mean(ifelse(involved_federal_agency == 1, petty_case, NA), na.rm = T), 1),
	`11b_share_petty_cases_regional_agency` = round(100*mean(ifelse(involved_regional_agency == 1, petty_case, NA), na.rm = T), 1),
	`11b_share_petty_cases_municipal_agency` = round(100*mean(ifelse(involved_municipal_agency == 1, petty_case, NA), na.rm = T), 1),
	`12_share_government_plaintiff` = round(100*mean(government_plaintiff, na.rm = T), 1),
	`12a_share_federal_agency_plaintiff` = round(100*mean(federal_agency_plaintiff, na.rm = T), 1),
	`12b_share_regional_agency_plaintiff` = round(100*mean(regional_agency_plaintiff, na.rm = T), 1),
	`12c_share_municipal_agency_plaintiff` = round(100*mean(municipal_agency_plaintiff, na.rm = T), 1),
	`13a_share_tax` = round(100*mean(caseCategory_government == "Taxes, duties, and dues", na.rm = T), 1),
	`13b_share_othercivil` = round(100*mean(caseCategory_government == "Other civil law", na.rm = T), 1),
	`13c_share_adminoffences` = round(100*mean(caseCategory_government == "Administrative offences", na.rm = T), 1),
	`13d_share_otherpublic` = round(100*mean(caseCategory_government == "Other public law", na.rm = T), 1),
	`13e_share_contractbreach` = round(100*mean(caseCategory_government == "Contract breach", na.rm = T), 1),
	`13f_share_othercontract` = round(100*mean(caseCategory_government == "Other contract-related", na.rm = T), 1),
	`13g_share_other` = round(100*mean(caseCategory_government == "Other", na.rm = T), 1),
	`14_share_appealed` = round(100*mean(was_appealed, na.rm = T), 1),
	`15_count_judges` = uniqueN(presiding_judge),
	`16a_count_judges_krasnodar` = uniqueN(ifelse(presiding_judge_krasnodar == 1, presiding_judge, NA)) - 1,
	`17a_share_cases_by_judges_krasnodar` = round(100*mean(presiding_judge_krasnodar, na.rm = T), 1),
	`18_share_government_win` = round(100*mean(government_win_first, na.rm = T), 1),
	`18a_share_federal_agency_win` = round(100*mean(federal_agency_win_first, na.rm = T), 1),
	`18b_share_regional_agency_win` = round(100*mean(regional_agency_win_first, na.rm = T), 1),
	`18c_share_municipal_agency_win` = round(100*mean(municipal_agency_win_first, na.rm = T), 1)
), by = "year"]
setorderv(government_year_table, "year")
government_year_table <- dcast(melt(government_year_table, id.vars = "year"), variable ~ year)

# Export the table
fwrite(government_year_table, file = "tables/tableS2_government_year_table_krasnodar.csv")

##############################
# TABLE S3. Summary statistics at case level by year: Private disputes where one of the parties is not local, Krasnodar arbitrazh courts of first instance

## A table with summary statistics (variables are named to preserve order)
private_year_table <- krasnodar_cases[ involved_government == 0 & local_entities_only == 0, list(
	`01_count_cases` = .N,
	`02_median_claim_sum` = round(median(ifelse(claimSum_deflated > 0, claimSum_deflated, NA), na.rm = T)/1000, 1),
	`03_median_days_to_decision` = round(median(days_elapsed, na.rm = T), 1),
	`04_median_court_documents_per_case` = round(median(countDocumentsByCourt, na.rm = T), 1),
	`05_share_petty_cases` = round(100*mean(petty_case, na.rm = T), 1),
	`06a_share_breachbanking` = round(100*mean(caseCategory_private == "Contract breach: insurance/banking", na.rm = T), 1),
	`06b_share_breachsupply` = round(100*mean(caseCategory_private == "Contract breach: supply", na.rm = T), 1),
	`06c_share_other` = round(100*mean(caseCategory_private == "Other", na.rm = T), 1),
	`06d_share_othercontract` = round(100*mean(caseCategory_private == "Other contract-related", na.rm = T), 1),
	`06e_share_breachworks` = round(100*mean(caseCategory_private == "Contract breach: works", na.rm = T), 1),
	`06f_share_breachservices` = round(100*mean(caseCategory_private == "Contract breach: services", na.rm = T), 1),
	`06g_share_breachenergy` = round(100*mean(caseCategory_private == "Contract breach: energy", na.rm = T), 1),
	`07_share_local_plaintiff` = round(100*mean(local_entity_plaintiff, na.rm = T), 1),
	`08_share_appealed` = round(100*mean(was_appealed, na.rm = T), 1),
	`09_count_judges` = uniqueN(presiding_judge),
	`10a_count_judges_krasnodar` = uniqueN(ifelse(presiding_judge_krasnodar == 1, presiding_judge, NA)) - 1,
	`11a_share_cases_by_judges_krasnodar` = round(100*mean(presiding_judge_krasnodar, na.rm = T), 1),
	`12_share_local_entity_win` = round(100*mean(local_entity_win_first, na.rm = T), 1)
), by = "year"]
setorderv(private_year_table, "year")
private_year_table <- dcast(melt(private_year_table, id.vars = "year"), variable ~ year)

# Export the table
fwrite(private_year_table, file = "tables/tableS3_private_year_table_krasnodar.csv")
